rm(list = ls())
gc()

library(lubridate)
library(tidyverse)
library(foreign)
library(ggplot2)
library(ggthemes)
library(estimatr)
library(mice)
library(ggpubr)

df_long = readRDS("data/baseline_clean_mi.rds")

df = as.mids(df_long)

indices = df$data %>% 
  select(c(response_num, names(df$data)[str_detect(names(df$data), "^index")]))

covars = c("syr_origin", "syr_origin_urban", "location_its", "sick", "head_educ2", "toddler", "elderly", "female_headed_hh", "location_district", "hezb_control")
covars2 = c("syr_origin_urban", "location_its", "sick", "head_educ2", "toddler", "elderly", "female_headed_hh", "hezb_control")

source("functions/plot_it.R")
source("functions/effect_plotter3.R")

# Individual regressions -----------------------------------------------------------
predictors = names(indices)[2:length(indices)]

for(i in 1:length(predictors)){
  if(predictors[i] != "index_mobility"){
    assign(paste0("reg", i), with(df, lm_robust(as.formula(paste("rtrn_head_12", paste(c(predictors[i], covars), collapse = " + "), sep = " ~ ")), clusters = location_town, weights = weights)) %>% pool() %>% summary(conf.int = TRUE) %>% mutate(outcome = "rtrn_head_12"))
  }else{
    assign(paste0("reg", i), with(df, lm_robust(as.formula(paste("rtrn_head_12", paste(c(predictors[i], covars2), collapse = " + "), sep = " ~ ")), clusters = location_town, weights = weights)) %>% pool() %>% summary(conf.int = TRUE) %>% mutate(outcome = "rtrn_head_12"))
  }
}

dfplot1 = reg1

for(i in 2:length(predictors)){
  dfplot1 = dfplot1 %>% bind_rows(eval(parse(text = paste0("reg", i))))
}

for(i in 1:length(predictors)){
  if(predictors[i] != "index_mobility"){
    assign(paste0("reg", i), with(df, lm_robust(as.formula(paste("rtrn_head_ever", paste(c(predictors[i], covars), collapse = " + "), sep = " ~ ")), clusters = location_town, weights = weights)) %>% pool() %>% summary(conf.int = TRUE) %>% mutate(outcome = "rtrn_head_ever"))
  }else{
    assign(paste0("reg", i), with(df, lm_robust(as.formula(paste("rtrn_head_ever", paste(c(predictors[i], covars2), collapse = " + "), sep = " ~ ")), clusters = location_town, weights = weights)) %>% pool() %>% summary(conf.int = TRUE) %>% mutate(outcome = "rtrn_head_ever"))
  }
}

for(i in 1:length(predictors)){
  dfplot1 = dfplot1 %>% bind_rows(eval(parse(text = paste0("reg", i))))
}


for(i in 1:length(predictors)){
  if(predictors[i] != "index_mobility"){
    assign(paste0("reg", i), with(df, lm_robust(as.formula(paste("prep_pca", paste(c(predictors[i], covars), collapse = " + "), sep = " ~ ")), clusters = location_town, weights = weights)) %>% pool() %>% summary(conf.int = TRUE) %>% mutate(outcome = "prep_pca"))
  }else{
    assign(paste0("reg", i), with(df, lm_robust(as.formula(paste("prep_pca", paste(c(predictors[i], covars2), collapse = " + "), sep = " ~ ")), clusters = location_town, weights = weights)) %>% pool() %>% summary(conf.int = TRUE) %>% mutate(outcome = "prep_pca"))
  }
}

for(i in 1:length(predictors)){
  dfplot1 = dfplot1 %>% bind_rows(eval(parse(text = paste0("reg", i))))
}


for(i in 1:length(predictors)){
  if(predictors[i] != "index_mobility"){
    assign(paste0("reg", i), with(df, lm_robust(as.formula(paste("expect2yr_syria", paste(c(predictors[i], covars), collapse = " + "), sep = " ~ ")), clusters = location_town, weights = weights)) %>% pool() %>% summary(conf.int = TRUE) %>% mutate(outcome = "expect2yr_syria"))
  }else{
    assign(paste0("reg", i), with(df, lm_robust(as.formula(paste("expect2yr_syria", paste(c(predictors[i], covars2), collapse = " + "), sep = " ~ ")), clusters = location_town, weights = weights)) %>% pool() %>% summary(conf.int = TRUE) %>% mutate(outcome = "expect2yr_syria"))
  }
}

for(i in 1:length(predictors)){
  dfplot1 = dfplot1 %>% bind_rows(eval(parse(text = paste0("reg", i))))
}
reg1

dfplot1 = dfplot1 %>% mutate(bold = "plain") %>% 
  mutate(term = factor(term, levels = c("index_safety_syria", "index_regime_control",
                                        "index_econ_syria", "index_services_syria", "index_networks_syria",
                                        "index_econ_leb",
                                        "index_services_leb", "index_networks_lebanon", "index_soc_leb",
                                        "index_legal_leb", "index_mobility", "index_family",
                                        "index_info_quality"))) %>% 
  mutate(term = recode(term, "index_econ_leb" = "Economic well-being ",
                       "index_soc_leb" = "Social well-being ",
                       "index_services_leb" = "Services ",
                       "index_legal_leb" = "Legal conditions ",
                       "index_safety_syria" = "Safety",
                       "index_econ_syria" = "Economic well-being",
                       "index_services_syria" = "Services",
                       "index_networks_syria" = "Networks",
                       "index_mobility" = "Log travel distance", 
                       "index_family" = "Log household size",
                       "index_networks_lebanon" = "Networks ",
                       "index_info_quality" = "Confidence in information",
                       "index_regime_control" = "Regime control")) %>% 
  #mutate(term = fct_rev(term)) %>% 
  mutate(outcome = recode(outcome, "rtrn_head_12" = "Return in 12 months",
                          "rtrn_head_ever" = "Return ever",
                          "prep_pca" = "Prepare to return", 
                          "expect2yr_syria" = "Return in 2 years"),
         outcome = factor(outcome, levels = c("Return in 12 months", "Prepare to return", "Return in 2 years", 
                                              "Return ever"))
  )

dfplot1 = dfplot1 %>% arrange(outcome, term)
dfplot1 = dfplot1 %>% filter(!is.na(term))

dfplot1 %>% 
  filter(outcome == "Return in 12 months") %>% 
  mutate(predictor = as.character(term)) %>% 
  mutate(paste = c(rep(" Syria", 5),
                   rep("Lebanon", 5),
                   rep("", 3)
                   ),
         predictor = paste0(predictor, paste)) %>% 
  select(predictor, estimate, std.error, p.value) %>% 
  rename(Predictor = predictor, Estimate = estimate, Std.Error = std.error) %>% 
  xtable::xtable(digits = 3, label = "tab:short_individual", 
                 caption = "Regression results for the predictors of return intentions in 12 months (using an individual index in each regression). 
                 The regression includes controls, district fixed effects in Lebanon, and governorate of origin fixed effects in Syria. 
               Standard errors are clustered at the town level in Lebanon.") %>% 
  xtable::print.xtable(table.placement = "H", include.rownames = F, file = "tables/short_individual.tex")

dfplot1 %>% 
  filter(outcome == "Prepare to return") %>% 
  mutate(predictor = as.character(term)) %>% 
  mutate(paste = c(rep(" Syria", 5),
                   rep("Lebanon", 5),
                   rep("", 3)
  ),
  predictor = paste0(predictor, paste)) %>% 
  select(predictor, estimate, std.error, p.value) %>% 
  rename(Predictor = predictor, Estimate = estimate, Std.Error = std.error) %>% 
  xtable::xtable(digits = 3, label = "tab:prep_individual", 
                 caption = "Regression results for the predictors of return preparations (using an individual index in each regression). 
                 The regression includes controls, district fixed effects in Lebanon, and governorate of origin fixed effects in Syria. 
               Standard errors are clustered at the town level in Lebanon.") %>% 
  xtable::print.xtable(table.placement = "H", include.rownames = F, file = "tables/prep_individual.tex")

# Regressions all indices-------------------------------------------------------------
predictors = names(indices)[2:length(indices)]
predictors_a = predictors[which(predictors!="index_services_syria")]
predictors_b = predictors[which(predictors!="index_safety_syria")]

reg1a = with(df, lm_robust(as.formula(paste("rtrn_head_12", paste(c(predictors_a, covars2), collapse = " + "), sep = " ~ ")), weights = weights, clusters = location_town)) %>% pool %>% summary(conf.int = TRUE) %>% mutate(outcome = "rtrn_head_12")
reg1b = with(df, lm_robust(as.formula(paste("rtrn_head_12", paste(c(predictors_b, covars2), collapse = " + "), sep = " ~ ")), weights = weights, clusters = location_town)) %>% pool %>% summary(conf.int = TRUE) %>% mutate(outcome = "rtrn_head_12")
reg1 = reg1a %>% bind_rows(reg1b %>% filter(term == "index_services_syria"))

reg2a = with(df, lm_robust(as.formula(paste("rtrn_head_ever", paste(c(predictors_a, covars2), collapse = " + "), sep = " ~ ")), weights = weights, clusters = location_town)) %>% pool %>% summary(conf.int = TRUE) %>% mutate(outcome = "rtrn_head_ever")
reg2b = with(df, lm_robust(as.formula(paste("rtrn_head_ever", paste(c(predictors_b, covars2), collapse = " + "), sep = " ~ ")), weights = weights, clusters = location_town)) %>% pool %>% summary(conf.int = TRUE) %>% mutate(outcome = "rtrn_head_ever")
reg2 = reg2a %>% bind_rows(reg2b %>% filter(term == "index_services_syria"))

reg3a = with(df, lm_robust(as.formula(paste("prep_pca", paste(c(predictors_a, covars2), collapse = " + "), sep = " ~ ")), weights = weights, clusters = location_town)) %>% pool %>% summary(conf.int = TRUE) %>% mutate(outcome = "prep_pca")
reg3b = with(df, lm_robust(as.formula(paste("prep_pca", paste(c(predictors_b, covars2), collapse = " + "), sep = " ~ ")), weights = weights, clusters = location_town)) %>% pool %>% summary(conf.int = TRUE) %>% mutate(outcome = "prep_pca")
reg3 = reg3a %>% bind_rows(reg3b %>% filter(term == "index_services_syria"))

reg4a = with(df, lm_robust(as.formula(paste("expect2yr_syria", paste(c(predictors_a, covars2), collapse = " + "), sep = " ~ ")), weights = weights, clusters = location_town)) %>% pool %>% summary(conf.int = TRUE) %>% mutate(outcome = "expect2yr_syria")
reg4b = with(df, lm_robust(as.formula(paste("expect2yr_syria", paste(c(predictors_b, covars2), collapse = " + "), sep = " ~ ")), weights = weights, clusters = location_town)) %>% pool %>% summary(conf.int = TRUE) %>% mutate(outcome = "expect2yr_syria")
reg4 = reg4a %>% bind_rows(reg4b %>% filter(term == "index_services_syria"))

dfplot2 = reg1 %>% bind_rows(reg2) %>% bind_rows(reg3) %>% bind_rows(reg4)

dfplot2 = dfplot2 %>% filter(term %in% names(indices))

dfplot2 = dfplot2 %>% mutate(bold = "plain") %>% 
  mutate(term = factor(term, levels = c("index_safety_syria", "index_regime_control",
                                        "index_econ_syria", "index_services_syria", "index_networks_syria",
                                        "index_econ_leb",
                                        "index_services_leb", "index_networks_lebanon", "index_soc_leb",
                                        "index_legal_leb", "index_mobility", "index_family",
                                        "index_info_quality"))) %>% 
  mutate(term = recode(term, "index_econ_leb" = "Economic well-being ",
                       "index_soc_leb" = "Social well-being ",
                       "index_services_leb" = "Services ",
                       "index_legal_leb" = "Legal conditions ",
                       "index_safety_syria" = "Safety",
                       "index_econ_syria" = "Economic well-being",
                       "index_services_syria" = "Services",
                       "index_networks_syria" = "Networks",
                       "index_mobility" = "Log travel distance", 
                       "index_family" = "Log household size",
                       "index_networks_lebanon" = "Networks ",
                       "index_info_quality" = "Confidence in information",
                       "index_regime_control" = "Regime control")) %>% 
  #mutate(term = fct_rev(term)) %>% 
  mutate(outcome = recode(outcome, "rtrn_head_12" = "Return in 12 months",
                          "rtrn_head_ever" = "Return ever",
                          "prep_pca" = "Prepare to return", 
                          "expect2yr_syria" = "Return in 2 years"),
         outcome = factor(outcome, levels = c("Return in 12 months", "Prepare to return", "Return in 2 years", 
                                              "Return ever"))
  )


dfplot2 = dfplot2 %>% arrange(outcome, term)

dfplot2 %>% 
  filter(outcome == "Return in 12 months") %>% 
  mutate(predictor = as.character(term)) %>% 
  mutate(paste = c(rep(" Syria", 5),
                   rep("Lebanon", 5),
                   rep("", 3)
  ),
  predictor = paste0(predictor, paste)) %>% 
  select(predictor, estimate, std.error, p.value) %>% 
  rename(Predictor = predictor, Estimate = estimate, Std.Error = std.error) %>% 
  xtable::xtable(digits = 3, label = "tab:short_all", 
                 caption = "Regression results for the predictors of return intentions in 12 months (using all indices in each regression). 
                 The regression includes controls, district fixed effects in Lebanon, and governorate of origin fixed effects in Syria. 
               Standard errors are clustered at the town level in Lebanon.") %>% 
  xtable::print.xtable(table.placement = "H", include.rownames = F, file = "tables/short_all.tex")



dfplot1 %>% 
  filter(outcome == "Prepare to return") %>% 
  mutate(predictor = as.character(term)) %>% 
  mutate(paste = c(rep(" Syria", 5),
                   rep("Lebanon", 5),
                   rep("", 3)
  ),
  predictor = paste0(predictor, paste)) %>% 
  select(predictor, estimate, std.error, p.value) %>% 
  rename(Predictor = predictor, Estimate = estimate, Std.Error = std.error) %>% 
  xtable::xtable(digits = 3, label = "tab:prep_all", 
                 caption = "Regression results for the predictors of return preparations (using all indices in each regression). 
                 The regression includes controls, district fixed effects in Lebanon, and governorate of origin fixed effects in Syria. 
               Standard errors are clustered at the town level in Lebanon.") %>% 
  xtable::print.xtable(table.placement = "H", include.rownames = F, file = "tables/prep_all.tex")


dfplot1$type = "indiv_indices"
dfplot2$type = "all_indices"


dfplot_complete = dfplot1 %>% bind_rows(dfplot2)
dfplot_complete$type = dfplot_complete$type %>% factor(levels = c("indiv_indices", "all_indices"))

dfplot_complete = dfplot_complete %>% arrange(outcome, type, term)

# 12 months plot ---------------------------------------------------------------
dfplot_12mon = dfplot_complete[dfplot_complete$outcome == "Return in 12 months",]
dfplot_12mon = dfplot_12mon %>% select(- one_of("2.5 %", "97.5 %", "statistic", "p.value", "bold", "outcome"))
dfplot_12mon$group = rep(c(rep("group1", 5), rep("group2", 5), rep("group3", 2), "group4"), 2)

dfplot_12mon$code = rep(1:13, 2)

dfplot_12mon = dfplot_12mon %>% 
  bind_rows(tibble(term = rep(c("Pull factors in Syria", "", "Push factors from Lebanon", "",
                            "Mobility cost", "", "Information", ""), 2),
            group = rep("empty", 16),
            code = rep(0, 16),
            estimate = rep(NA, 16),
            std.error = rep(NA, 16),
            df = rep(NA, 16),
            type = c(rep("indiv_indices", 8), rep("all_indices", 8))
            ))

dfplot_12mon$order = c(41, 40, 39, 38, 37,
                34, 33, 32, 31, 30,
                27, 26,
                23,
                20, 19, 18, 17, 16,
                13, 12, 11, 10, 9,
                6, 5,
                2, 
                42, 36, 35, 29, 28, 25, 24, 22, 21, 15, 14, 8, 7, 4, 3, 1
                )

dfplot_12mon = dfplot_12mon %>% arrange(-order)
dfplot_12mon = dfplot_12mon %>% select(term, group, code, estimate, std.error, df, order, type)

dfplot_12mon$z95 = qt(.975, dfplot_12mon$df)
dfplot_12mon$z90 = qt(.95, dfplot_12mon$df)

dfplot_12mon = dfplot_12mon %>% rename(pe = estimate, se = std.error, name = term)

dfplot_12mon$order[dfplot_12mon$order > 21] = dfplot_12mon$order[dfplot_12mon$order > 21] - 21 

p1 = plotit2(d = dfplot_12mon,
            effect.label = "Effect on Likelihood of \nIntending to Return",
            x.lower = -0.05,
            x.upper = 0.05,
            labs = T,
            title = "Return in 12 months",
            legend = F)
p1

# Prepare to return -------------------------------------------------------
dfplot_prep = dfplot_complete[dfplot_complete$outcome == "Prepare to return",]
dfplot_prep = dfplot_prep %>% select(- one_of("2.5 %", "97.5 %", "statistic", "p.value", "bold", "outcome"))
dfplot_prep$group = rep(c(rep("group1", 5), rep("group2", 5), rep("group3", 2), "group4"), 2)

dfplot_prep$code = rep(1:13, 2)

dfplot_prep = dfplot_prep %>% 
  bind_rows(tibble(term = rep(c("Pull factors in Syria", "", "Push factors from Lebanon", "",
                                "Mobility cost", "", "Information", ""), 2),
                   group = rep("empty", 16),
                   code = rep(0, 16),
                   estimate = rep(NA, 16),
                   std.error = rep(NA, 16),
                   df = rep(NA, 16),
                   type = c(rep("indiv_indices", 8), rep("all_indices", 8))
  ))

dfplot_prep$order = c(41, 40, 39, 38, 37,
                34, 33, 32, 31, 30,
                27, 26,
                23,
                20, 19, 18, 17, 16,
                13, 12, 11, 10, 9,
                6, 5,
                2, 
                42, 36, 35, 29, 28, 25, 24, 22, 21, 15, 14, 8, 7, 4, 3, 1
)

dfplot_prep = dfplot_prep %>% arrange(-order)
dfplot_prep = dfplot_prep %>% select(term, group, code, estimate, std.error, df, order, type)


dfplot_prep$z95 = qt(.975, dfplot_prep$df)
dfplot_prep$z90 = qt(.95, dfplot_prep$df)

dfplot_prep = dfplot_prep %>% rename(pe = estimate, se = std.error, name = term)

dfplot_prep$order[dfplot_prep$order > 21] = dfplot_prep$order[dfplot_prep$order > 21] - 21 

p2 = plotit2(d = dfplot_prep, effect.label = "Effect on Likelihood of \nPreparing to Return",
            x.lower = -0.2, x.upper = 0.3, labs = F, title = "Prepare to return", legend = TRUE)

p2
ggarrange(p1, p2, widths = c(1.55, 1), common.legend = T, legend = "bottom")

ggarrange(p1, p2, widths = c(1.55, 1), common.legend = T, legend = "bottom") %>% 
  ggexport(filename = "figures/rtrn_short.png", height = 1600, width = 1500, res = 200)

